# -*- coding:utf8 -*-
# !/usr/bin/env python

"""
#全国企业信用信息公示系统（宁夏）
#维护黄羽
"""
import re
import urllib
from bs4 import BeautifulSoup
from utils import kill_captcha
from scpy.logger import get_logger
import table
import requests
import traceback
import time
import copy
import sd_template_dict as TE

SLEEP_TIME = 1

logger = get_logger(__file__)

ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"


def extract_share_holder_detail(detail_res):
    detail_td = re.findall("<td.*?>(.*?)</td>", str(detail_res), re.S)

    subConam = table.money_notclean(detail_td[1]) if detail_td and len(detail_td) > 1 else ""
    conDate = table.parse_time(detail_td[5]) if detail_td and len(detail_td) > 6 else ""
    detail_dict = {"shareholderType": "", "shareholderName": detail_td[0], "regCapCur": "", "country": "",
                   "fundedRatio": "", "subConam": subConam, "conDate": conDate}
    return detail_dict


def download_captcha_kill(companyName):
    """
    下载验证码，破解，然后搜索公司，获取公司的一系列代码，用于构建后面的请求 referer 和 url
    :param companyName:
    :param province:
    :return:
            若验证码破解成功且公司存在公司，返回获取公司的一系列代码：post_args_list。
            若公司不存在返回None
            若破解的验证码错误，返回''
            若破解过程、访问网页出现失败，抛出异样
    """
    img_url = r'http://gsxt.ngsh.gov.cn/ECPS/verificationCode.jsp?_=1444218779041'
    img_headers = {
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': ua,
    }
    req = requests.session()
    req.headers = img_headers
    img = req.get(img_url, timeout=100).content

    if not img:  # 判断下载的验证码是否正确
        logger.info('下载的验证码为空,:%s' % img)
        return ''

    res_code = kill_captcha(img, "nx", "jpg")
    if not res_code or len(res_code) > 100 or str(res_code) in ['None', 'wrong']:
        logger.error('破解验证码服务器返回的验证码格式错误')
        logger.error('验证码为:%s' % res_code)
        # 返回空字符串，用于重复破解,len(res_Code) > 100,服务器返回的是500的页面
        return ''

    # 发送识别验证码数字到工商网
    check_data_1 = {'password': res_code}
    check_data_2 = {
        'isEntRecord': '',
        'password': res_code,
        'loginInfo.regno': '',
        'loginInfo.entname': '',
        'loginInfo.idNo': '',
        'loginInfo.mobile': '',
        'loginInfo.password': '',
        'loginInfo.verificationCode': '',
        'otherLoginInfo.name': '',
        'otherLoginInfo.cardId': '',
        'otherLoginInfo.mobile': '',
        'otherLoginInfo.unit': '',
        'otherLoginInfo.dept': '',
        'otherLoginInfo.password': '',
        'otherLoginInfo.verificationCode': '',
        'selectValue': companyName,
    }

    # 必须发送两次请求
    check_url_1 = r'http://gsxt.ngsh.gov.cn/ECPS/qyxxgsAction_checkVerificationCode.action'
    check_url_2 = r'http://gsxt.ngsh.gov.cn/ECPS/qyxxgsAction_queryXyxx.action'

    res_message = ''
    TRYTIME = 2  # 需要执行2次
    a_trt_time = TRYTIME
    while not res_message and a_trt_time > 0:
        time.sleep(2)
        res_message = req.post(check_url_1, data=check_data_1, timeout=100).content
        if res_message:
            logger.info('网站返回验证码消息为:%s' % res_message)
            break
        else:
            logger.info('网站返回验证码消息为:%s' % res_message)
            a_trt_time -= 1
            continue
    if res_message == '{"message":"ok"}':
        logger.info('网站返回验证码消息为:%s' % res_message)
        pass
    elif res_message or res_message == '{"message":"验证码输入错误"}':
        logger.info('网站返回验证码消息为:%s' % res_message)
        return ''

    # 拿公司注册号
    time.sleep(2)
    res = req.post(check_url_2, data=check_data_2, timeout=100).content
    if re.findall('您搜索的条件无查询结果', res):
        return None

    companyInfo_soup = BeautifulSoup(res, 'html5lib')
    qyList_soup = companyInfo_soup.find_all(id='qyList')

    # 返回的数据不对，重新抓取
    if qyList_soup:
        pass
    else:
        logger.info('网站返回网页部错误，内容为:%s' % str(companyInfo_soup))
        return ''

    # qyList = str(qyList_soup[0])
    # alist = re.compile(r'''.+showJbxx(.+)">?''').findall(qyList)

    # 返回的数据不对，重新抓取
    # qylx = re.findall('a href="qyxxgsAction_initQyxyxxMain\.action.*?qylx=(.*?)&', res)
    # nbxh = re.findall('a href="qyxxgsAction_initQyxyxxMain\.action.*?nbxh=(.*?)&', res)
    # qylxFlag = re.findall('a href="qyxxgsAction_initQyxyxxMain\.action.*?qylxFlag=(.*?)&', res)
    # zch = re.findall('a href="qyxxgsAction_initQyxyxxMain\.action.*?zch=(.*?)', res)
    res_list = re.findall(
        'a href="qyxxgsAction_initQyxyxxMain\.action.*?qylx=(.*?)&nbxh=(.*?)&qylxFlag=(.*?)&zch=(.*?)"', res)

    # if alist:
    #     pass
    # else:
    #     logger.info('网站返回网页部错误，内容为:%s' % str(companyInfo_soup))
    #     return ''

    # post_args_list = alist[0].replace('(','').replace(')','').replace("'",'').split(',')
    if res_list:
        post_args_list = list(res_list[0])
        # post_args_list.append(urllib.quote(companyName.encode(encoding='utf-8')))
        post_args_list.append(companyName)
        return post_args_list
    else:
        raise Exception("网站可能出现变化")


def get_company_info(com_info):
    """
    下载网页、年报网页
    :param com_info:首页的网页
    :return:公司源码字典
    """
    if not com_info:
        return None
        # raise Exception("com_list 错误")

    qylx = com_info[0]
    nbxh = com_info[1]
    qylxFlag = com_info[2]
    zch = com_info[3]
    qymc = com_info[4]

    raw_dict = {
        "province": "nx",
        "type": "1",
        "html": "",
        "yearList": [],
        "keyword": "",
        "companyName": "",
        "json": "",
    }
    raw_base_dict = {}
    referer_dict = {
        'qylx': qylx,
        'nbxh': nbxh,
        'qylxFlag': qylxFlag,
        'zch': zch,
    }
    referer_url = urllib.urlencode(referer_dict)
    root_url = 'http://gsxt.ngsh.gov.cn/ECPS/'
    req = requests.session()
    req.headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cache-Control': 'max-age=0',
        'Host': 'gsxt.ngsh.gov.cn',
        'Referer': root_url + 'qyxxgsAction_initQyxyxxMain.action?qylx=' + referer_url,
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': ua,
    }
    time.sleep(SLEEP_TIME)
    base_url = root_url + 'qyxxgsAction_initQyjbqk.action?nbxh=%s&qylx=%s' % (nbxh, qylx)
    raw_base_res = req.get(base_url, timeout=30).content
    # raw_base_res = str(BeautifulSoup(raw_base_res, 'html5lib'))
    raw_base_dict['base'] = raw_base_res

    # 股东详情
    time.sleep(SLEEP_TIME)
    share_url = root_url + 'tzrczxxAction_init.action?nbxh=%s&qylx=%s&currPage=1&maxPerPage=100' % (nbxh, qylx)
    raw_share_res = req.get(share_url, timeout=30).content
    raw_share_res = str(BeautifulSoup(raw_share_res, 'html5lib')) if raw_share_res else ''
    raw_base_dict['share'] = raw_share_res
    share_table_s = table.table_clean(raw_share_res, '股东信息')
    share_detail_url = re.findall('''href="(.*?)"''', share_table_s, re.S)
    raw_share_detail_list = []
    for a_detail_url in share_detail_url:
        a_detail_url = root_url + a_detail_url.replace("amp;", "")
        try:
            time.sleep(SLEEP_TIME)
            detail_res = req.get(a_detail_url, timeout=30).content
        except:
            time.sleep(SLEEP_TIME)
            detail_res = ''
        raw_share_detail_list.append(detail_res)
    raw_base_dict['share_dtail'] = raw_share_detail_list

    # 变更信息
    time.sleep(SLEEP_TIME)
    alter_url = root_url + 'qybgxxAction_init.action?nbxh=%s&qylx=%scurrPage=1&maxPerPage=100' % (nbxh, qylx)
    try:
        raw_alter_res = req.get(alter_url, timeout=30).content
    except:
        time.sleep(SLEEP_TIME)
        raw_alter_res = ''
    raw_alter_res = str(BeautifulSoup(raw_alter_res, 'html5lib'))
    raw_base_dict['alter'] = raw_alter_res

    # 主要人员
    time.sleep(SLEEP_TIME)
    person_url = root_url + 'qybaxxAction_zyryxx.action?nbxh=%s&qylx=%scurrPage=1&maxPerPage=100' % (nbxh, qylx)
    try:
        raw_person_res = req.get(person_url, timeout=30).content
    except:
        time.sleep(SLEEP_TIME)
        raw_person_res = ''
    raw_person_res = str(BeautifulSoup(raw_person_res, 'html5lib'))
    raw_base_dict['person'] = raw_person_res

    # 分支机构
    time.sleep(SLEEP_TIME)
    branch_url = root_url + 'qybaxxAction_fgsxx.action?nbxh=%s&qylx=%s' % (nbxh, qylx)
    try:
        raw_branch_res = req.get(branch_url, timeout=30).content
    except:
        time.sleep(SLEEP_TIME)
        raw_branch_res = ''
    raw_branch_res = str(BeautifulSoup(raw_branch_res, 'html5lib'))
    raw_base_dict['branch'] = raw_branch_res

    # 清算信息
    time.sleep(SLEEP_TIME)
    account_url = root_url + 'qybaxxAction_qsxx.action?nbxh=%s&qylx=%s' % (nbxh, qylx)
    try:
        raw_account_res = req.get(account_url, timeout=30).content
    except:
        time.sleep(SLEEP_TIME)
        raw_account_res = ''
    raw_account_res = str(BeautifulSoup(raw_account_res, 'html5lib'))
    raw_base_dict['liquidation'] = raw_account_res

    # 经营异常信息
    time.sleep(SLEEP_TIME)
    abnormal_url = root_url + 'jyycxxAction_init.action?nbxh=' + nbxh
    try:
        raw_abnormal_res = req.get(abnormal_url, timeout=30).content
    except:
        time.sleep(SLEEP_TIME)
        raw_abnormal_res = ''
    raw_abnormal_res = str(BeautifulSoup(raw_abnormal_res, 'html5lib'))
    raw_base_dict['abnormal'] = raw_abnormal_res

    # 抽查检查信息
    time.sleep(SLEEP_TIME)
    check_url = root_url + 'ccjcxxAction_init.action?nbxh=' + nbxh
    try:
        raw_check_res = req.get(check_url, timeout=30).content
    except:
        time.sleep(SLEEP_TIME)
        raw_check_res = ''
    raw_check_res = str(BeautifulSoup(raw_check_res, 'html5lib'))
    raw_base_dict['check'] = raw_check_res

    raw_dict['html'] = raw_base_dict

    # 年报
    year_index_url = 'http://gsxt.ngsh.gov.cn/ECPS/qygsAction_initQygsMain.action?'
    # year_query_req_dict = {
    #     'nbxh': nbxh,
    #     'zch': zch,
    #     'qylxFlag': qylxFlag,
    #     'qylx': qylx,
    #     'qymc': qymc,
    # }
    # 此处 不能用params参数，只能把后面的参数urlencode再加上nbxh，因为nbxh在urlencode后发生变化
    year_query_url = 'nbxh=%s&' % nbxh + urllib.urlencode(
        {'zch': zch, 'qylxFlag': qylxFlag, 'qylx': qylx, 'qymc': qymc, })
    time.sleep(SLEEP_TIME)
    # raw_year_index = req.get(year_index_url, params=year_query_req_dict).content
    raw_year_index = req.get(year_index_url + year_query_url).content
    nbxh_2 = re.findall('<input type="hidden" name="nbxh" value="(.*?)"', raw_year_index)
    if not nbxh_2:  # 不存在年报value
        return raw_dict

    nbxh_2 = nbxh_2[0]
    years_url = 'http://gsxt.ngsh.gov.cn/ECPS/qyNbxxAction_init.action?'
    time.sleep(SLEEP_TIME)
    raw_year_res = req.get(years_url, params={'nbxh': nbxh_2, 'zch': zch, 'qymc': qymc}).content
    raw_year_res_soup = BeautifulSoup(raw_year_res, 'html5lib')
    years_a_tag = raw_year_res_soup.find_all('a')
    raw_year_html_list = []
    for ayear_tag in years_a_tag:
        raw_year_html_dict = {}
        ayear_text = ayear_tag.get_text()
        ayear = re.compile(r'.*?(\d+).*').findall(ayear_text)[0]
        njnd = str(ayear)
        raw_year_html_dict['year'] = njnd
        # 获取每一年的年份
        # ayear_report_referer_dict = {
        #     'njnd':njnd,
        #     'nbxh':nbxh,
        #     'zch':zch,
        #     'qymc':qymc
        # }

        # 企业年报企业基本信息
        year_base_url = root_url + 'qyNbxxAction_qyjbxx.action?nbxh=' + nbxh + '&njnd=' + njnd + '&gsryflag='
        time.sleep(SLEEP_TIME)
        try:
            raw_year_base_res = req.get(year_base_url).content
        except:
            raw_year_base_res = ''
        raw_year_base_res = str(BeautifulSoup(raw_year_base_res, 'html5lib'))
        raw_year_html_dict['year_base'] = raw_year_base_res

        # 企业年报:网站或网店信息
        time.sleep(SLEEP_TIME)
        year_web_url = root_url + 'qyNbxxAction_wzxx.action?nbxh=' + nbxh + '&njnd=' + njnd + '&gsryflag=' + 'currPage=1&maxPerPage=100'
        try:
            raw_year_web_res = req.get(year_web_url).content
        except:
            raw_year_web_res = ''
        raw_year_web_res = str(BeautifulSoup(raw_year_web_res, 'html5lib'))
        raw_year_html_dict['web'] = raw_year_web_res

        # 企业年报:股东及出资信息
        time.sleep(SLEEP_TIME)
        year_investor_url = root_url + 'qyNbxxAction_tzrczxx.action?nbxh=' + nbxh + '&njnd=' + njnd + '&gsryflag=' + 'currPage=1&maxPerPage=100'
        try:
            raw_year_investor_res = req.get(year_investor_url).content
        except:
            raw_year_investor_res = ''
        raw_year_investor_res = str(BeautifulSoup(raw_year_investor_res, 'html5lib'))
        raw_year_html_dict['investor'] = raw_year_investor_res

        # 企业年报:对外投资信息
        time.sleep(SLEEP_TIME)
        year_invest_url = root_url + 'qyNbxxAction_dwtzxx.action?nbxh=' + nbxh + '&njnd=' + njnd + '&gsryflag=' + 'currPage=1&maxPerPage=100'
        try:
            raw_year_invest_res = req.get(year_invest_url).content
        except:
            raw_year_invest_res = ''
        raw_year_invest_res = str(BeautifulSoup(raw_year_invest_res, 'html5lib'))
        raw_year_html_dict['invest'] = raw_year_invest_res

        # 企业年报:企业资产状况信息
        time.sleep(SLEEP_TIME)
        year_assets_url = root_url + 'qyNbxxAction_qyzcxx.action?nbxh=' + nbxh + '&njnd=' + njnd + '&gsryflag='
        try:
            raw_year_assets_res = req.get(year_assets_url).content
        except:
            raw_year_assets_res = ''
        raw_year_assets_res = str(BeautifulSoup(raw_year_assets_res, 'html5lib'))
        raw_year_html_dict['assets'] = raw_year_assets_res

        # 企业年报:对外提供保证担保信息
        time.sleep(SLEEP_TIME)
        year_guarantee_url = root_url + 'qyNbxxAction_dwdbxx.action?nbxh=' + nbxh + '&njnd=' + njnd + '&gsryflag='
        try:
            raw_year_guarantee_res = req.get(year_guarantee_url).content
        except:
            raw_year_guarantee_res = ''
        raw_year_guarantee_res = str(BeautifulSoup(raw_year_guarantee_res, 'html5lib'))
        raw_year_html_dict['guarantee'] = raw_year_guarantee_res

        # 企业年报:股权变更信息
        time.sleep(SLEEP_TIME)
        year_equity_change = root_url + 'qyNbxxAction_gqbgxx.action?nbxh=' + nbxh + '&njnd=' + njnd + '&gsryflag='
        try:
            year_equity_change_res = req.get(year_equity_change).content
        except:
            year_equity_change_res = ''
        year_equity_change_res = str(BeautifulSoup(year_equity_change_res, 'html5lib'))
        raw_year_html_dict['equity_change'] = year_equity_change_res

        # 企业年报:修改记录
        time.sleep(SLEEP_TIME)
        year_modify_url = root_url + 'qyNbxxAction_xgjlxx.action?nbxh=' + nbxh + '&njnd=' + njnd + '&gsryflag=' + 'currPage=1&maxPerPage=100'
        try:
            year_modify_res = req.get(year_modify_url).content
        except:
            year_modify_res = ''
        year_modify_res = str(BeautifulSoup(year_modify_res, 'html5lib'))
        raw_year_html_dict['modify'] = year_modify_res

        raw_year_html_list.append(raw_year_html_dict)

    raw_dict['yearList'] = raw_year_html_list

    return raw_dict


def clean_shareHolderList(a_table):
    a_table = table.tr(a_table)
    detail = re.findall('<tr>.*?</tr>', a_table)
    # print detail
    del detail[0]
    del detail[0]
    if detail:
        shareHolderList = []
        for i in detail:
            i = table.td_clean(i)
            # print i
            detail_td = re.findall("<td>(.*?)</td>", i)
            # print detail_td
            try:
                shareholderType = detail_td[0]
                shareholderName = detail_td[1]
                dic1 = {"shareholderType": shareholderType, "shareholderName": shareholderName, 'country': '',
                        'subConam': '', 'regCapCur': '', 'conDate': '', 'fundedRatio': ''}
                shareHolderList.append(dic1)
            except:
                continue
        return shareHolderList
    else:
        shareHolderList = []
        return shareHolderList


def extract_base_info(raw_dict):
    if not raw_dict:
        raise Exception("raw_dict 错误")

    raw_html = raw_dict.get("html", {})
    if not raw_html:
        raise Exception("raw_dict 错误")

    # 基本信息
    raw_base = raw_html.get("base")
    # raw_base = str(BeautifulSoup(raw_base, 'html5lib'))
    raw_base_table = table.table_clean(raw_base, "基本信息")
    if not raw_base_table:
        raise Exception("基本信息错误")
    res_dict = copy.deepcopy(TE.void_base_dict)
    res_dict["basicList"] = table.index("基本信息", raw_base_table)
    res_dict["province"] = "nx"

    # 股东信息
    raw_share = raw_html.get("share", '')
    share_table = table.table_clean(raw_share, "股东信息") or table.table_clean(raw_share, "股东（发起人）信息") if raw_share else ''
    share_holder_list_1 = clean_shareHolderList(share_table) if share_table else []

    # todo
    # 股东信息详情
    raw_share_detail = raw_html.get("share_detail", [])
    for a_share_detail in raw_share_detail:
        a_share_detail = str(BeautifulSoup(a_share_detail, 'html5lib')) if a_share_detail else ''
        a_share_detail_table = re.findall("(<table.*?</table>)", a_share_detail, re.S)
        if a_share_detail_table:
            trs = re.findall("(<tr.*?</tr>)", a_share_detail_table[0], re.S)
            if trs and len(trs) > 3:
                tds = re.findall("<td.*?>(.*?)</td>", trs[3], re.S)
                if tds and len(tds) == 9:
                    shareHolder_dict = {
                        'shareholderName': tds[0],
                        'shareholderType': '',  # 股东类型
                        'country': '',  # 国别
                        'subConam': table.money_notclean(tds[1]) if tds[1] else '',  # 认缴出资额(单位:万元)
                        'regCapCur': '',  # 币种
                        'conDate': table.parse_time(tds[5]) or table.parse_time(tds[8]),  # 出资日期
                        'fundedRatio': '',  # 出资比例
                        # 'funded': '',
                    }
                    # 融合股东详情页和首页的股东信息
                    for iii, a_detail_1 in enumerate(share_holder_list_1):
                        if a_detail_1.get("shareholderName", "") == tds[0] and tds[0]:
                            shareHolder_dict["shareholderType"] = a_detail_1.get("shareholderType", "")
                            share_holder_list_1[iii] = copy.deepcopy(shareHolder_dict)
    res_dict["shareHolderList"] = share_holder_list_1

    # 变更信息
    raw_alter = raw_html.get("alter", '')
    alter_table = table.table_clean(raw_alter, "变更信息") if raw_alter else ''
    alter_list = table.index("变更信息", alter_table) if alter_table else []
    # for a_alter in alter_list:
    #     for k, v in a_alter.items():
    #         a_alter[k] = re.sub('<.*?>', '', v)
    res_dict['alterList'] = alter_list

    # 主要人员信息
    raw_person = raw_html.get("person", "")
    person_table = table.table_clean(raw_person, "主要人员信息") or table.table_clean(raw_base,
                                                                                "家庭成员信息") if raw_person else ''
    res_dict['personList'] = table.index("主要人员信息", person_table) if person_table else []

    # 分支机构信息
    raw_branch = raw_html.get("branch", "")
    branch_table = table.table_clean(raw_branch, "分支机构信息") if raw_branch else ''
    res_dict['filiationList'] = table.index("分支机构信息", branch_table) if branch_table else []

    # 清算信息
    raw_liquidation = raw_html.get("liquidation", "")
    liquidation_table = table.table_clean(raw_liquidation, "清算信息") if raw_liquidation else ''
    res_dict['liquidationList'] = table.index("清算信息", liquidation_table) if liquidation_table else []

    # 经营异常信息
    raw_abnormal = raw_html.get("abnormal", "")
    raw_abnormal = str(BeautifulSoup(raw_abnormal, 'html5lib')) if raw_abnormal else ''
    abnormal_table = table.table_clean(raw_abnormal, "经营异常信息") if raw_abnormal else ''
    res_dict['abnormalOperation'] = table.index("经营异常信息", abnormal_table) if abnormal_table else []

    # 抽查检查信息
    raw_check = raw_html.get("check", "")
    raw_check = str(BeautifulSoup(raw_check, 'html5lib')) if raw_check else ''
    check_table = table.table_clean(raw_check, "抽查检查信息") if raw_check else ''
    res_dict['checkMessage'] = table.index("抽查检查信息", check_table) if check_table else []

    return res_dict


def extract_year_info(raw_dict):
    if not raw_dict:
        raise Exception("raw_dict 错误")

    raw_year_list = raw_dict.get("yearList", [])

    res_year_list = []

    for a_raw_year_item in raw_year_list:
        res_year_dict = copy.deepcopy(TE.void_year_dict)
        res_year_dict["year"] = a_raw_year_item.get("year", "")
        raw_year_base = a_raw_year_item.get("year_base", "")
        raw_year_base = str(BeautifulSoup(raw_year_base, 'html5lib')) if raw_year_base else ''

        # 基本信息
        year_base_table = table.table_clean(raw_year_base, "企业基本信息") or table.table_clean(raw_year_base, "基本信息")
        res_year_dict["baseInfo"] = table.report_index("企业基本信息", year_base_table) if year_base_table else {}

        # 网站或网店信息
        raw_year_web = a_raw_year_item.get("web", "")
        year_web_table = table.table_clean(raw_year_web, "网站或网店信息") if raw_year_web else ''
        res_year_dict["website"] = table.report_index("网站或网店信息", year_web_table) if year_web_table else {}

        # 股东及出资信息
        raw_year_investor = a_raw_year_item.get("investor", "")
        year_share_table = table.table_clean(raw_year_investor, "股东（发起人）及出资信息") if raw_year_investor else ''
        res_year_dict["investorInformations"] = table.report_index("股东及出资信息",
                                                                   year_share_table) if year_share_table else []

        # 对外投资信息
        raw_year_invest = a_raw_year_item.get("invest", "")
        year_invest_table = table.table_clean(raw_year_invest, "对外投资信息") if raw_year_invest else ''
        res_year_dict["entinvItemList"] = table.report_index("对外投资信息", year_invest_table) if year_invest_table else []

        # 企业资产状况信息
        raw_year_assets = a_raw_year_item.get("assets", "")
        year_assets_table = table.table_clean(raw_year_assets, "企业资产状况信息") if raw_year_assets else ''
        res_year_dict["assetsInfo"] = table.report_index("企业资产状况信息", year_assets_table) if year_assets_table else {}

        # 股权变更信息
        raw_year_equity_change = a_raw_year_item.get("equity_change", "")
        year_equity_table = table.table_clean(raw_year_equity_change, "股权变更信息") if raw_year_equity_change else ''
        res_year_dict["equityChangeInformations"] = table.report_index("股权变更信息",
                                                                       year_equity_table) if year_equity_table else []

        # 修改记录
        raw_year_modify = a_raw_year_item.get("modify", "")
        year_change_table = table.table_clean(raw_year_modify, "修改记录") if raw_year_modify else ''
        change_record_list = table.report_index("修改记录", year_change_table) if year_change_table else []
        # for a_change_record in change_record_list:
        #     for k, v in a_change_record.items():
        #         a_change_record[k] = re.sub('<.*?>', '', v)

        res_year_dict["changeRecords"] = change_record_list

        res_year_list.append(res_year_dict)

    return res_year_list


def search2(companyName, MAXTIME=40):
    res = ''
    asic_dict = {}
    # MAXTIME = 20
    a_time = MAXTIME
    while a_time > 0:
        # print res, '*'*20
        if res is None:  # 公司不存在
            return None
        elif res == '':  # 验证码错误
            if a_time < MAXTIME:
                logger.error("重复破解验证码!当前设定重复破解次数为:%s, 剩余次数为:%s " % (MAXTIME, a_time))
            a_time -= 1
            try:
                # time.sleep(10)
                res = download_captcha_kill(companyName)
                # print res
            except Exception, e:
                traceback.print_exc(e)
                raise e
        else:
            break
    if a_time <= 1 and res == '':
        raise Exception("多次破解验证码错误,当前设置次数为：%s" % MAXTIME)
    else:
        com_list = res
        res = get_company_info(com_list)
        raw_dict = res
        try:
            asic_dict = extract_base_info(raw_dict)
            year_list = extract_year_info(raw_dict)
            company_name = asic_dict['basicList'][0].get('enterpriseName', '')
            company_name = company_name if company_name else companyName
            res['companyName'] = company_name

            asic_dict['yearReportList'] = year_list
            gate_method = {
                'url': 'http://gsxt.ngsh.gov.cn/ECPS/',
                'method': 'get',
                'province': 'nx',
                'companyName': company_name,
                'data': com_list,
            }

            return res, asic_dict, gate_method

        except Exception, e:
            logger.info(e)
            res['companyName'] = companyName
            gate_method = {
                'url': 'http://gsxt.ngsh.gov.cn/ECPS/',
                'method': 'get',
                'province': 'nx',
                'companyName': companyName,
                'data': com_list,
            }
            return res, None, gate_method


def search(companyName):
    res = search2(companyName)
    if not res:
        return None
    else:
        return res[1]


def search3(gate_method):
    if 'data' not in gate_method:
        raise Exception("gate_method error, doesn't have `data` key")
    com_list = gate_method.get('data')
    res = get_company_info(com_list)
    companyName = gate_method.get('companyName', '')

    raw_dict = res
    try:
        asic_dict = extract_base_info(raw_dict)
        year_list = extract_year_info(raw_dict)
        company_name = asic_dict['basicList'][0].get('enterpriseName', '')
        company_name = company_name if company_name else companyName
        res['companyName'] = company_name

        asic_dict['yearReportList'] = year_list
        gate_method = {
            'url': 'http://gsxt.ngsh.gov.cn/ECPS/',
            'method': 'get',
            'province': 'nx',
            'companyName': company_name,
            'data': com_list,
        }

        return res, asic_dict, gate_method

    except Exception, e:
        logger.info(e)
        res['companyName'] = companyName
        gate_method = {
            'url': 'http://gsxt.ngsh.gov.cn/ECPS/',
            'method': 'get',
            'province': 'nx',
            'companyName': companyName,
            'data': com_list,
        }
        return res, None, gate_method


if __name__ == "__main__":
    # 正常公司
    # import pymongo
    # import json
    # # pymongo.MongoClient('192.168.31.121', 27017)
    # clientServer = pymongo.MongoClient('192.168.31.121',27017)
    # db = clientServer.crawler_company_name
    # collectionServer = db.companyName
    # reg_no_s = collectionServer.find({'province': 'nx'}).limit(100)
    # for reg in reg_no_s:
    #     print '#'*10
    #     print reg
    #     print '#'*10
    #     reg_no = reg['regNo']
    #     try:
    #         res = search(reg_no)
    #         print json.dumps(res, indent=4, ensure_ascii=False)
    #     except Exception, e:
    #         import traceback
    #         traceback.print_exc()
    #         # print reg
    #         import pdb
    #         pdb.set_trace()

    # companyName = u'宁夏英力特化工股份有限公司'
    # companyName = u'宁夏鸿兴会计师事务所'
    # companyName = u'吴忠市天龙电子器材销售中心'
    companyName = u'宁夏复甲商业资产运营有限公司'

    res = search2(companyName)
    import json

    print json.dumps(res, indent=4, ensure_ascii=False)
    #
    #

    # #!/usr/bin/env python
    # import requests
    # import base64
    # auth = 'lum-customer-socialcredits-zone-gen:a98d2b7b4b0e'
    # print(requests.get('http://lumtest.com/myip.json', proxies = {'http': 'http://'+auth+'@zproxy.luminati.io:22225'},
    #     headers = {'Proxy-Authorization': 'Basic '+base64.b64encode(auth.encode('utf-8')).decode('utf-8')}).text)
    #
    #
